---
geometry: margin = 0.7in
output: 
  pdf_document:
    keep_tex: true
---

\pagenumbering{gobble}

```{r setup, warning = FALSE, message = FALSE, echo = F}
library(tidyverse)
library(stargazer)
library(xtable)
library(RColorBrewer)
library(gridExtra)
library(stringr)
library(Zelig)
options(xtable.comment = FALSE)
options(xtable.table.placement = "!h")
library(kableExtra)
library(foreign)

############################### DATA CLEANING #########################################

# load public-use data file - download and unzip "Feb15 Data Release" from Pew's Website
# set working directory
setwd()
# read in data
public <- foreign::read.spss("Feb15 Multiracial_cleaned.sav", to.data.frame = T, use.value.labels = F)   

# load in dataset containing new variables constructed from public-use data and write-in responses in restricted-use data
new <- read.csv("~/new_variables.csv")

# merge by CaseID
data <- left_join(public, new, by = 'CaseID')

# RECODE DEMOGRAPHIC VARIABLES
# recode gender
# attributes(merged$PPGENDER) #ref = male
data$ppgender <- as.factor(data$PPGENDER) 
data$ppgender <- factor(data$ppgender, levels = c(1,2), labels = c("Male", "Female"))

#recode region
data$region <- as.factor(data$PPREG4) # 1 = Northeast, 2 = Midwest, 3 = South 4 = West
data$region <- factor(data$region, labels = c("Northeast", "Midwest", "South", "West"))
data$region <- relevel(data$region, ref = "South") 

# recode age so that reference age/0 is equal to 18 (the youngest age possible)
data$age <- data$PPAGE - 18

# recode education
data$educ2cat <- ifelse(data$PPEDUCAT == 4, 1, 0)
data$educ2cat <- factor(data$educ2cat, levels = c(0,1), labels = c("No BA", "BA plus"))

#recode income -- take midpoint of each category, divide by 1000, and log
#midpoint for top category derived from a pareto-based formula; Hout 2004  
# 681 observations in second highest category and 1162 obs in highest category
V <- (log(681 + 1162) - log(1162))/(log(175000) - log(150000))
M <- 175000*0.5*(1 + (V/(V - 1)))
data <- data %>% mutate(income = case_when(PPINCIMP == 1 ~ 2500,
                                           PPINCIMP == 2 ~ (7499 - 5000)/2 + 5000,
                                           PPINCIMP == 3 ~ (9999 - 7500)/2 + 7500,
                                           PPINCIMP == 4 ~ (12499 - 10000)/2 + 10000,
                                           PPINCIMP == 5 ~ (14999 - 12500)/2 + 12500,
                                           PPINCIMP == 6 ~ (19999 - 15000)/2 + 15000,
                                           PPINCIMP == 7 ~ (24999 - 20000)/2 + 20000,
                                           PPINCIMP == 8 ~ (29999 - 25000)/2 + 25000,
                                           PPINCIMP == 9 ~ (34999 - 30000)/2 + 30000,
                                           PPINCIMP == 10 ~ (39999 - 35000)/2 + 35000,
                                           PPINCIMP == 11 ~ (49999 - 40000)/2 + 40000,
                                           PPINCIMP == 12 ~ (59999 - 50000)/2 + 50000,
                                           PPINCIMP == 13 ~ (74999 - 60000)/2 + 60000,
                                           PPINCIMP == 14 ~ (84999 - 75000)/2 + 75000,
                                           PPINCIMP == 15 ~ (99999 - 85000)/2 + 85000,
                                           PPINCIMP == 16 ~ (124999 - 100000)/2 + 100000,
                                           PPINCIMP == 17 ~ (149999 - 125000)/2 + 125000,
                                           PPINCIMP == 18 ~ (174999 - 150000)/2 + 150000,
                                           PPINCIMP == 19 ~ M))
# express in thousands
data$income <- data$income/1000
#logged household income
data$logged.income <- log(data$income)

# survey language
data$xspanish <- as.factor(data$XSPANISH) #ref english

###############################################################################
# Creating Ancestry Variables
#any_X = 1 if R selected X category for mom, dad, grandparents
#any_X also = 1 if R said R had ggparents or ancestors of diff race than self/mom/dad/gparents,
#and that race was X category

data <- data %>% mutate(# White ancestry
                        any_white = case_when(QR2a == 1 ~ 1,
                                              QR3a == 1 ~ 1,
                                              QR4a == 1 ~ 1,
                                              QR5 == 1 & QR5a == 1 ~ 1,
                                              TRUE ~ 0),
                        # Hispanic/Latino ancestry
                        any_latino = case_when(QR2b == 1 ~ 1,
                                               QR3b == 1 ~ 1,
                                               QR4b == 1 ~ 1,
                                               QR5 == 1 & QR5b == 1 ~ 1,
                                               TRUE ~ 0),
                        # Black ancestry
                        any_black = case_when(QR2c == 1 ~ 1,
                                              QR3c == 1 ~ 1,
                                              QR4c == 1 ~ 1,
                                              QR5 == 1 & QR5c == 1 ~ 1,
                                              TRUE ~ 0),
                        # Asian ancestry
                        any_asian = case_when(QR2d == 1 ~ 1,
                                              QR3d == 1 ~ 1,
                                              QR4d == 1 ~ 1,
                                              QR5 == 1 & QR5d == 1 ~ 1,
                                              TRUE ~ 0),
                        # American Indian ancestry
                        any_indian = case_when(QR2e == 1 ~ 1,
                                               QR3e == 1 ~ 1,
                                               QR4e == 1 ~ 1,
                                               QR5 == 1 & QR5e == 1 ~ 1,
                                               TRUE ~ 0),
                        # Native Hawaiian or Pacific Islander,
                        any_nhopi = case_when(QR2f == 1 ~ 1,
                                              QR3f == 1 ~ 1,
                                              QR4f == 1 ~ 1,
                                              QR5 == 1 & QR5f == 1 ~ 1,
                                              TRUE ~ 0),
                        # Other
                        any_other = case_when(QR2g == 1 ~ 1,
                                              QR3g == 1 ~ 1,
                                              QR4g == 1 ~ 1,
                                              QR5 == 1 & QR5g == 1 ~ 1,
                                              TRUE ~ 0))

################################################################################################
# racial ancestry regimes
data$regime <- case_when(data$any_black == 1 ~ "Black",
                         data$any_black == 0 & data$any_latino == 1 ~ "Hispanic",
                         data$any_black == 0 & data$any_latino == 0 & 
                           data$any_asian == 1 ~ "Asian",
                         data$any_black == 0 & data$any_latino == 0 &
                           data$any_asian == 0 & data$any_indian == 1 ~ "Indigenous",
                         data$any_black == 0 & data$any_latino == 0 &
                           data$any_asian == 0 & data$any_nhopi == 1 ~ "Indigenous",
                         data$any_black == 0 & data$any_latino == 0 &
                           data$any_asian == 0 & data$any_indian == 0 & data$any_nhopi == 0 ~ "Other")

## convert to factor and make "Other" the reference category
data$regime <- factor(data$regime) %>%
                  fct_relevel(., "Other", "Black", "Hispanic", "Asian", "Indigenous")

########################################################################################
# Outcome variables

################################## 
# Reports Multiracial Ancestry 

# if R did not report MOM, DAD, GPARENT, OR ANCESTOR ancestry, we can't tell presence and timing  
# of MR ancestry. In that case, R is flagged as missing (n = 95) 

data <- data %>% mutate(ancestry_incomplete = case_when(QR2a == 9 ~ 1,
                                                        QR2b == 9 ~ 1,
                                                        QR2c == 9 ~ 1,
                                                        QR2d == 9 ~ 1, 
                                                        QR2e == 9 ~ 1,
                                                        QR2f == 9 ~ 1,
                                                        QR2g == 9 ~ 1,
                                                        QR3a == 9 ~ 1, 
                                                        QR3b == 9 ~ 1,
                                                        QR3c == 9 ~ 1,
                                                        QR3d == 9 ~ 1, 
                                                        QR3e == 9 ~ 1,
                                                        QR3f == 9 ~ 1, 
                                                        QR3g == 9 ~ 1,
                                                        QR4a == 9 ~ 1, 
                                                        QR4b == 9 ~ 1,
                                                        QR4c == 9 ~ 1, 
                                                        QR4d == 9 ~ 1, 
                                                        QR4e == 9 ~ 1,
                                                        QR4f == 9 ~ 1,
                                                        QR4g == 9 ~ 1,
                                                        # if QR5a is missing, QR5b-g is also missing
                                                        QR5 == 1 & QR5a == 9 ~ 1,
                                                        TRUE ~ 0))

#### people are MR Aware if they report two or more racial categories in their ancestry
data <- data %>% 
  mutate(total_ancestry_races = any_white + any_black + any_latino + any_asian + 
                              any_indian +  any_nhopi + any_other,
         reportsMR = ifelse(total_ancestry_races >= 2, 1, 0)) 

# PLUS those who said "yes" to other ancestry despite not specifying which category (QR5 == 1)
# (aka they reported 1 racial category across all ancestors but said earlier ancestors were another race)
data$reportsMR <- ifelse(data$QR5 == 1, 1, data$reportsMR)

# remove ancestry_incomplete cases from reportsMR coding
data$reportsMR <- ifelse(data$ancestry_incomplete == 1, NA, data$reportsMR)

##########################################################
### Multiracial Self-ID (Selects 2 or more races for self)

# recode Refused to Missing
# (if QR1a == 9 for a respondent, then QR1b-g is also 9; R refused to answer QR1)
data[data$QR1a == 9, "QR1a"] <- NA
data[data$QR1b == 9, "QR1b"] <- NA
data[data$QR1c == 9, "QR1c"] <- NA
data[data$QR1d == 9, "QR1d"] <- NA
data[data$QR1e == 9, "QR1e"] <- NA
data[data$QR1f == 9, "QR1f"] <- NA
data[data$QR1g == 9, "QR1g"] <- NA

# sum up the number of races reported
data <- data %>% mutate(num_races = QR1a + QR1b + QR1c + QR1d + QR1e + QR1f + QR1g,
# MR Self- ID: 1 if respondent reports 2 or more racial categories for self, 0 if not
                        MRid = ifelse(num_races >= 2, 1, 0)) 

### recoding write-in responses
# R's who self-identify as "Some Other Race" may fall into the following scenarios:
# 1) If they self-id as SOR along with TWO OR MORE other categories (QR1g == 1 & num_races >= 3) they are coded as MR-identifying

# 2) If they self-id as SOR along with ONE other category (QR1g == 1 & num_races == 2), then there are two possibilities:
#    A) The write-in response was irrelevant or redundant with the other category they selected 
#       (e.g. write-in was "German" when they checked "White" and "Some Other Race"), so the "race_R" variable was recoded 
#       to a single race (one character long string).
#    B) The write-in response indicates that the respondent was expressing multiracial identity. In 
#       these cases, the "race_R" variable would be recoded to indicate 2+ races/origins and would be 2+ characters long.

# 3) If they self-id as SOR ONLY (QR1g == 1 & num_races == 1), then there are two possibilities:
#    A) The write-in response did NOT indicate multiracial self-identification. In these cases, 
#       the "race_R" variable would remain blank.
#    B) The write-in response DID indicate multiracial self-identification. In these cases, the "race_R"
#       variable would be recoded to a 2+ character string indexing the categories their write-in response mentioned. 


# Cases that belong to 1) do not need to be recoded because they are counted as multiracially self-identifying anyway.
# Cases that belong to 2A) NEED TO BE CODED BACK TO MONORACIAL because would be counted as multiracial self-id in our coding scheme, even though manual review indicates that they are not expressing multiracial identity.
# Cases that belong to 2B) do not need to be recoded because they are counted as multiracial self-id in our coding scheme and manual review agrees.
# Cases that belong to 3A) do not need to be recoded because they are counted as monoracial self-id in our coding scheme and manual review agrees.
# Cases that belong to 3B) NEED TO BE CODED TO MULTIRACIAL because they are counted as monoracial in our codings scheme, but manual review indicates that they are expressing multiracial identity.

# Cases that should be recoded back to monoracial (2A)
monoracial_cases <- unname(unlist(data %>% filter(num_races == 2 & QR1g == 1) %>% 
  mutate(races = nchar(race_R)) %>%
  filter(races == 1) %>%
  select(CaseID))) 
data$MRid <- ifelse(data$CaseID %in% monoracial_cases, 0, data$MRid)

# Cases that should be recoded to multiracial (3B)
multiracial_other_cases <- unname(unlist(data %>% filter(num_races == 1 & QR1g == 1) %>% 
  mutate(races = nchar(race_R)) %>%
  filter(races >= 1) %>%
  select(CaseID)))    
data$MRid <- ifelse(data$CaseID %in% multiracial_other_cases, 1, data$MRid)


# relabel levels
data$MRid <- factor(data$MRid, levels = c(0,1), labels = c("Selects 1 race", "Selects 2+ races"))

########### Combine MR awareness and identification to one variable (useful for some descriptives)
data <- data %>% mutate(AwareID = as.factor(case_when(reportsMR == 0 ~ "Monoracial Ancestry",
                                                      reportsMR == 1 & MRid == "Selects 1 race" ~ 
                                                        "Multiracial Ancestry;\nMonoracial Self-identification",
                                                      TRUE ~ "Multiracial Ancestry and\nSelf-identification")))
data$AwareID = factor(data$AwareID, 
                      levels = c("Monoracial Ancestry",
                                 "Multiracial Ancestry;\nMonoracial Self-identification",
                                 "Multiracial Ancestry and\nSelf-identification"))

#########################################################################################################
# recode generation (following previous RA's approach but counting Some Other Race as category)

# create dummy for having Some Other Race in ancestry and 
# recode race_mom, race_dad, race_grand, and race_great variables so that "O" (for "Other") is added 

# note: if the respondent didn't select QR#a-f before (e.g. only SOR was selected),
# race_mom/dad/grand/great will be blank
data <- data %>% mutate(QR2gz = ifelse(QR2g == 1, "O", NA),
               QR3gz = ifelse(QR3g == 1, "O", NA),
               QR4gz = ifelse(QR4g == 1, "O", NA),
               QR5gz = ifelse(QR5g == 1, "O", NA)) %>%
  mutate(new_race_mom = ifelse(is.na(QR2gz), race_mom, paste(race_mom, QR2gz, sep = "")),
         new_race_dad = ifelse(is.na(QR3gz), race_dad ,paste(race_dad, QR3gz, sep = "")),
         new_race_grand = ifelse(is.na(QR4gz), race_grand, paste(race_grand, QR4gz, sep = "")),
         new_race_great = ifelse(is.na(QR5gz), race_great, paste(race_great, QR5gz, sep = ""))) 

# generation dummy variables
data <- data %>%
########## identify monoracial (0 generations)
# if one racial category is reported in ancestry AND does NOT answer "yes" to QR5 
# (same results as matching string of race_mom, race_dad, etc.)
  mutate(gen0 = ifelse(total_ancestry_races == 1 & QR5 != 1, 1, 0),
########## identify 1st gen
# if both parents are single race AND those races are different AND QR5 != 1
         gen1 = ifelse(new_race_mom != new_race_dad &
                         nchar(new_race_mom) == 1 &
                         nchar(new_race_dad) == 1 &
                         QR5 != 1, 1, 0))
# but these include cases where grandparents have a value that aren't in parents 
grand_notinpar_cases <- data %>%
  filter(gen1 == 1) %>%
  # put space between characters in grandparent race string
  mutate(race_grand_space = trimws(gsub('(.{1})', '\\1 ',new_race_grand))) %>%
  # separate rows and match, flag == 1 if there is a race in grandparents that's not in mom or dad
  separate_rows(race_grand_space, sep = " ") %>%
  mutate(grand_new_flag = ifelse(race_grand_space == new_race_mom | race_grand_space == new_race_dad,
                              0, 1)) %>%
  # save CaseIDs of cases with grandparent new races
  group_by(CaseID) %>%
  summarise(grand_new_races = sum(grand_new_flag)) %>%
  filter(grand_new_races >= 1) %>%
  select(CaseID)
# recode these cases as NOT first gen
data$gen1 <- ifelse(data$CaseID %in% unlist(grand_notinpar_cases), 0, data$gen1)
  
# create generation variable - everyone who is not gen 0 or gen 1 is 2
data <- data %>% mutate(gens2cat = case_when(gen0 == 1 ~ 0,
                                             gen1 == 1 ~ 1,
                                             TRUE ~ 2))
# unless you have incomplete ancestry info, then you are missing
data$gens2cat <- ifelse(data$ancestry_incomplete, NA, data$gens2cat)

# how many checked "other" for earlier ancestors?
#data %>% filter(ancestry_incomplete == 0 & !is.na(MRid) & QR1g == 1) %>% 
#  summarise(n())
# 292 people checked other (1.3% of sample) 

# Note: 278 people (1.2% of sample) wrote something in (based on restricted data access from Pew)
                
########################################### FOOTNOTE 4 #################################################
##### what percentage of people missing ancestry/self ID are men or women?
#data %>% mutate(missing = ifelse(data$ancestry_incomplete == 1 | is.na(data$num_races), 1, 0)) %>% 
#  filter(missing == 1) %>% 
#  group_by(ppgender) %>% 
#  summarise(n())
# 50 out of 103 are men (48.5%)

########################################### FOOTNOTE 5 ##################################################
# how many said yes to QR5 (having diff race great-grandp or ancestors) but only marked QR5g, Some Other Race?
#nrow(data %>% filter(ancestry_incomplete == 0 & QR5 == 1 & 
#                  QR5a == 0 & QR5b == 0 & QR5c == 0 & QR5d == 0 &
#                  QR5e == 0 & QR5f == 0 & QR5g == 1))
#154 cases, or 0.7% of sample

# how many did not report multiracial ancestry but self-identified as multiracial?
#nrow(data %>% filter(reportsMR == 0 & MRid == "Selects 2+ races"))
# 35 cases, or 0.2% of sample
########################################### FOOTNOTE 6 ##################################################
# multiracial cases that were recoded (and not later dropped due to incomplete ancestry)
#nrow(data %>% filter(ancestry_incomplete == 0 & CaseID %in% multiracial_other_cases))

# mmonoracial cases that were recoded (and not later dropped due to incomplete ancestry)
#nrow(data %>% filter(ancestry_incomplete == 0 & CaseID %in% monoracial_cases))
```

```{r, echo = F}
#### CREATE ANALYTIC SAMPLES

########### Drop cases with incomplete ancestry OR incomplete self-ID
data <- filter(data, ancestry_incomplete == 0 & !is.na(MRid))

########### create MR only dataset 
MR <- filter(data, reportsMR == 1)

# turn generation variable into factor
MR$gens2cat <- as.factor(MR$gens2cat)
```



```{r, echo = F, message = F, warning = F, results = 'asis'}
########################## TABLES ###########################

# Table 1: descriptive table

#round(prop.table(table(data$gens2cat))*100, 1)
#round(prop.table(table(data$MRid))*100, 1)
#round(prop.table(table(data$ppgender))*100, 1)
#round(prop.table(table(data$regime))*100, 1)
#round(prop.table(table(data$region))*100, 1)
#mean((data$age)) + 18
#summary(data$age) +18
#mean(data$logged.income)
#summary(data$logged.income)
#round(prop.table(table(data$educ2cat))*100, 1)
#round(prop.table(table(data$xspanish))*100, 1)

#round(prop.table(table(MR$gens2cat))*100, 1)
#round(prop.table(table(MR$MRid))*100, 1)
#round(prop.table(table(MR$ppgender))*100, 1)
#round(prop.table(table(MR$regime))*100, 1)
#round(prop.table(table(MR$region))*100, 1)
#mean(MR$age) + 18
#summary(MR$age) +18
#mean(MR$logged.income)
#summary(MR$logged.income)
#round(prop.table(table(MR$educ2cat))*100, 1)
#round(prop.table(table(MR$xspanish))*100, 1)

Variable <- c("Monoracial", "Multiracial: First generation", 
              "Multiracial: Second generation or higher",
              "Selects one race", "Selects two or more races",
              "Female", "Male",
              "Black", "Hispanic", "Asian",
              "Indigenous", "Residual (White/other)", 
              "Northeast", "Midwest", "West", "South",
              "Age in years", "Logged income in thousands", 
              "Bachelor's degree or higher", "Took survey in Spanish")

Mean <- c("80.9", "2.7", "16.4",
          "95.0", "5.0",
          "51.0", "49.0",
          "7.8", "11.7", "2.5", "7.4", "70.6",
          "17.2", "25.6", "24.5", "32.8",
          "54.6 (18-94)", "3.9 (0.9-5.4)", "45.2", "4.6")

MR_Mean <- c("-", "14.3", "85.7",
             "74.5", "25.5",
             "55.2", "44.8",
             "23.1", "26.4", "3.7", "38.3", "8.4",
             "12.6", "19.1", "27.2", "41.0",
             "50.0 (18-89)", "3.7 (0.9-5.4)", "38.8", "7.0")

desp_table <- data.frame(cbind(Variable, Mean, MR_Mean))
colnames(desp_table) <- c("", "% or \nMean (Range)", "% or \nMean (Range)")

kable(desp_table, "latex", booktabs = T, caption = "Descriptive Statistics") %>%
  kable_styling(position = "center",
                latex_options = c("HOLD_position")) %>% 
  add_header_above(c(" ", 
                     "Full Sample \n(N = 22,616)" = 1, 
                     "Multiracial Ancestry \n(N = 4,330)" = 1),
                   bold = T) %>%
  group_rows("Self-Reported Ancestry", 1, 3) %>%
  group_rows("Self-Identification", 4, 5) %>%
  group_rows("Gender", 6, 7) %>%
  group_rows("Racial Classification Regime", 8, 12) %>%
  group_rows("Region", 13, 16) %>%
  group_rows("Other Demographics", 17, 20) %>%
  column_spec(1, width = "8cm") %>%
  footnote(general = "Pew Research Center’s 2015 Survey of Multiracial Adults",
           general_title = "Source:") 
```

\newpage
```{r, echo = F, message = F, warning = F, results = 'asis'}
# Table 2: Regime Descriptions
Regime <- c("Black", "Hispanic", "Asian", "Indigenous", "Residual(White/other)")
Description <- c(
  "Respondents who reported black or African American ancestry for any of their parents, grandparents and/or earlier ancestors",
  "Respondents who reported Hispanic ancestry but did not report black ancestry",
  "Respondents who reported Asian ancestry but did not report black or Hispanic ancestry",
  "Respondents who reported American Indian, Native Hawaiian, or Pacific Islander ancestry but not did not report black, Hispanic, or Asian ancestry",
  "Respondents who were not assigned to one of the above regimes. In the full sample, 99% of people in this category have some white ancestry, and the majority does not report any other ancestry. In the aware subsample, people in this category have “White” and “Some Other Race” ancestry")

regime_descriptions <- data.frame(cbind(Regime, Description))

kable(regime_descriptions, "latex", booktabs = T, 
      caption = "Racial Classification Regimes") %>%
  kable_styling(position = "center", 
                latex_options = c("striped", "HOLD_position")) %>% 
  column_spec(1, width = "2.5 cm") %>%
  column_spec(2, width = "13.5 cm") 
```

\newpage

```{r, echo = F, message = F, warning = F, results = 'asis'}
# Table 3: ID and Awareness by Regime

# N (across regimes)
regime_N <- data %>% group_by(regime) %>% summarise(N = n())

# awareness prevalence across regimes
aware_regime_table <- data %>% 
  group_by(regime, reportsMR) %>%
  summarise(Freq = n()) %>%
  group_by(regime) %>%
  mutate(TotalAware = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)),
                            "%", sep = "")) %>%
  filter(reportsMR == 1) %>% 
  select(-c(reportsMR, Freq))

#join
regime_outcomes <- left_join(regime_N, aware_regime_table, by = "regime") %>%
  filter(regime != "Other")

# regime-specific ID: as MR, as race of regime, as another race
MRblacks <- filter(MR, regime == "Black")
MRlatinos <- filter(MR, regime == "Hispanic")
MRasians <- filter(MR, regime == "Asian")
MRindig <- filter(MR, regime == "Indigenous")

# blacks
mono_blacks <- unlist(
  MRblacks %>%
    mutate(MonoID = case_when(MRid == "Selects 2+ races" ~ "MR",
                              MRid == "Selects 1 race" & QR1c == 1 ~ "RegimeRace", 
                              TRUE ~ "Other")) %>%
    group_by(MonoID) %>%
    dplyr::summarise(Freq = n()) %>%
    mutate(Percentage = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
    select(-c(Freq)) %>%
    spread(MonoID, Percentage))

# hispanics
mono_latinos <- unlist(
 MRlatinos %>% 
    mutate(MonoID = case_when(MRid == "Selects 2+ races" ~ "MR",
                              MRid == "Selects 1 race" &  QR1b == 1 ~ "RegimeRace", 
                              TRUE ~ "Other")) %>%
    group_by(MonoID) %>%
    dplyr::summarise(Freq = n()) %>%
    mutate(Percentage = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
    select(-c(Freq)) %>%
    spread(MonoID, Percentage))

# asians
mono_asians <- unlist(
  MRasians %>% 
  mutate(MonoID = case_when(MRid == "Selects 2+ races" ~ "MR",
                            MRid == "Selects 1 race" & QR1d == 1 ~ "RegimeRace", 
                            TRUE ~ "Other")) %>%
  group_by(MonoID) %>%
  summarise(Freq = n()) %>%
  mutate(Percentage = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
  select(-c(Freq)) %>%
  spread(MonoID, Percentage))

# indigenous
mono_indig <- unlist(
  MRindig %>% 
  mutate(MonoID = case_when(MRid == "Selects 2+ races" ~ "MR",
                            MRid == "Selects 1 race" & QR1e == 1 ~ "RegimeRace", 
                            MRid == "Selects 1 race" & QR1f == 1 ~ "RegimeRace",
                            TRUE ~ "Other")) %>% 
  group_by(MonoID) %>%
  summarise(Freq = n()) %>%
  mutate(Percentage = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
  select(-c(Freq)) %>%
  spread(MonoID, Percentage))

#merge all together
mono_ID <- data.frame(rbind(mono_blacks, 
                            mono_latinos,
                            mono_asians, 
                            mono_indig))
regime_outcomes$MR_N <- c(nrow(MRblacks), 
                          nrow(MRlatinos),
                          nrow(MRasians),
                          nrow(MRindig))
regime_outcomes$MR <- mono_ID$MR
regime_outcomes$RegimeRace <- mono_ID$RegimeRace
regime_outcomes$Other <- mono_ID$Other
colnames(regime_outcomes) <- c("Regime",
                               "N",
                               "Aware of MR Ancestry",
                               "N",
                               "2+ Races",
                               "Regime Race",
                               "Another Race")

#print
kable(regime_outcomes, "latex", booktabs = T, 
      caption = "Multiracial Awareness and Self-Identification by Racial Regime") %>%
  kable_styling(position = "center", 
                latex_options = c("striped", "HOLD_position")) %>% 
  add_header_above(c(" " = 1, 
                     "In Full Sample \n(N = 22,616)" = 2, 
                     "Self-Identification among those with MR Ancestry \n(N = 4,330)" = 4),
                   bold = T) %>%
  column_spec(1, width = "2.5 cm") %>%
  column_spec(2, width = "1.2 cm") %>%
  column_spec(4, width = "1.2 cm") %>%
  footnote(general = "Pew Research Center’s 2015 Survey of Multiracial Adults",
           general_title = "Source:",
           footnote_as_chunk = T) 
```


\newpage

```{r, echo = F, message = F, results = 'asis'}
# Table 4: regression predicting awareness and predicting id - no interactions
# first without regime and then with

aware_fit <- glm(reportsMR ~ ppgender + region + age + logged.income + educ2cat + xspanish, 
                 data = data,
                 family = binomial)

aware_fit_regime <- glm(reportsMR ~ ppgender + regime + region + age + logged.income + 
                          educ2cat + xspanish, 
                 data = data,
                 family = binomial)

id_fit <- glm(MRid ~ ppgender + region + age + logged.income + educ2cat + xspanish,
              data = MR,
              family = binomial)

id_fit_regime <- glm(MRid ~ ppgender + regime + region +  age + logged.income + educ2cat + xspanish,
                  data = MR,
                 family = binomial)

get.or.se <- function(model) {
    broom::tidy(model) %>% 
    mutate(or = exp(estimate),
    var.diag = diag(vcov(model)),
    or.se = sqrt(or^2 * var.diag)) %>%
    select(or.se) %>% unlist %>% unname
}

stargazer(aware_fit, aware_fit_regime, id_fit, id_fit_regime,
          title = "Odds of Multiracial Ancestry Awareness and Self-Identification",
          header = F, digits = 3, intercept.bottom = T, t.auto = F, p.auto = F,
          apply.coef = exp,
          se = list(get.or.se(aware_fit),
                    get.or.se(aware_fit_regime),
                    get.or.se(id_fit),
                    get.or.se(id_fit_regime)),
          dep.var.labels = c("Aware of MR Ancestry", "Self-Identifies with Multiple Races"),
          column.labels = c("Gender + Controls", "+ Regime", 
                            "Gender + Controls", "+ Regime"),
          star.cutoffs = c(0.05, 0.01, 0.001),
          add.lines = list(c("Log Likelihood", round(logLik(aware_fit),0),
                                                    round(logLik(aware_fit_regime),0),
                                                    round(logLik(id_fit),0),
                                                     round(logLik(id_fit_regime), 0)),
                           c("Akaike Inf. Crit.", round(AIC(aware_fit),0),
                                                    round(AIC(aware_fit_regime),0),
                                                    round(AIC(id_fit),0),
                                                     round(AIC(id_fit_regime), 0)),
                           c("Bayesian Inf. Crit.", round(BIC(aware_fit),0),
                                                    round(BIC(aware_fit_regime),0),
                                                    round(BIC(id_fit),0),
                                                     round(BIC(id_fit_regime), 0))),
          covariate.labels = c("Female", 
                               "Black", "Hispanic", "Asian", "Indigenous",
                               "Northeast", "Midwest", "West", "Age",
                               "Logged Income", "BA or higher", "Spanish Version",
                               "Intercept"),
          keep.stat = c("n"))
```


\newpage

```{r, echo = F, message = F, results = 'asis'}
# Table 5: Self-ID: linearly add generation, then, gender*generation, then 3-way interaction
id_gen_fit <- glm(MRid ~ gens2cat + ppgender + regime + 
                    age + region + educ2cat + logged.income + xspanish,
                  data = MR,
                  family = binomial)

id_gender_gen_fit <- glm(MRid ~ ppgender*gens2cat + regime + 
                           age + region + educ2cat + logged.income + xspanish, 
                         data = MR,
                         family = binomial)

three_way_fit <- glm(MRid ~ ppgender*gens2cat*regime + 
                       age + region + educ2cat + logged.income + xspanish,
                     data = MR,
                     family = binomial)

get.or.se <- function(model) {
    broom::tidy(model) %>% 
    mutate(or = exp(estimate),
    var.diag = diag(vcov(model)),
    or.se = sqrt(or^2 * var.diag)) %>%
    select(or.se) %>% unlist %>% unname
}

stargazer(id_gen_fit, id_gender_gen_fit, three_way_fit,
          title = "Generation and Odds of Multiracial Self-Identification",
          header = F, digits = 3, intercept.bottom = T, t.auto = F, p.auto = F, no.space = T,
          apply.coef = exp,
          se = list(get.or.se(id_gen_fit),
                    get.or.se(id_gender_gen_fit),
                    get.or.se(three_way_fit)),
          dep.var.labels = c("Self-Identifies with Multiple Races"),
          column.labels = c("+ Generation", "Gender x Generation", "Gender x Generation x Regime"),
          star.cutoffs = c(0.05, 0.01, 0.001),
          add.lines = list(c("Controls Included", rep("Yes", 3)),
                           c("Log Likelihood", round(logLik(id_gen_fit),0),
                                                    round(logLik(id_gender_gen_fit),0),
                                                    round(logLik(three_way_fit),0)),
                           c("Akaike Inf. Crit.", round(AIC(id_gen_fit),0),
                                                    round(AIC(id_gender_gen_fit),0),
                                                    round(AIC(three_way_fit),0)),
                           c("Bayesian Inf. Crit.", round(BIC(id_gen_fit),0),
                                                    round(BIC(id_gender_gen_fit),0),
                                                    round(BIC(three_way_fit),0))),
          keep.stat = c("n"),
          omit = c("age", "region", "educ2cat", "logged.income", "xspanish"),
          order = c(2, 1, 3:6, 14:26, 7:13, 27),
          covariate.labels = c("Female", "Second+ Generation",  
                               "Black", "Hispanic", "Asian", "Indigenous", 
                               "Second+ Generation x Female", 
                               "Female x Black", "Female x Hispanic", 
                               "Female x Asian", "Female x Indigenous",
                               "Second+ Gen x Black", "Second+ Gen x Hispanic",
                               "Second+ Gen x Asian", "Second+ Gen x Indigenous", 
                               "Female x Second+ Gen x Black",
                               "Female x Second+ Gen x Hispanic", 
                               "Female x Second+ Gen x Asian",
                               "Female x Second+ Gen x Indigenous",
                               "Intercept"))

```


